'''skew normal and skew t distribution by Azzalini, A. & Capitanio, A.
and Gram-Charlier expansion distribution (using 4 moments)

this is the not cleaned development file
'''

from scipy import stats, special
from scipy.stats import distributions
import numpy as np

class SkewNorm_gen(distributions.rv_continuous):
    def __init__(self):
        #super(SkewNorm_gen,self).__init__(
        distributions.rv_continuous.__init__(self,
            name = 'Skew Normal distribution', shapes = 'alpha',
            extradoc = ''' '''                                 )
        
    def _argcheck(self, alpha):
        return 1 #(alpha >= 0)
    
    def _rvs(self, alpha):
        # see http://azzalini.stat.unipd.it/SN/faq.html
        delta = alpha/np.sqrt(1+alpha**2)
        u0 = stats.norm.rvs(size=self._size)
        u1 = delta*u0 + np.sqrt(1-delta**2)*stats.norm.rvs(size=self._size)
        return np.where(u0>0, u1, -u1)

    def _munp(self, n, alpha):
        # use pdf integration with _mom0_sc if only _pdf is defined.
        # default stats calculation uses ppf, which is much slower
        return self._mom0_sc(n, alpha)

    def _pdf(self,x,alpha):
        # 2*normpdf(x)*normcdf(alpha*x)
        return 2.0/np.sqrt(2*np.pi)*np.exp(-x**2/2.0) * special.ndtr(alpha*x)

    def _stats_skip(self,x,alpha,moments='mvsk'):
        #skip for now to force moment integration as check
        pass

def example_n():
    skewnorm = SkewNorm_gen()
    print skewnorm.pdf(1,0), stats.norm.pdf(1), skewnorm.pdf(1,0) - stats.norm.pdf(1)
    print skewnorm.pdf(1,1000), stats.chi.pdf(1,1), skewnorm.pdf(1,1000) - stats.chi.pdf(1,1)
    print skewnorm.pdf(-1,-1000), stats.chi.pdf(1,1), skewnorm.pdf(-1,-1000) - stats.chi.pdf(1,1)
    rvs = skewnorm.rvs(0,size=500)
    print 'sample mean var: ', rvs.mean(), rvs.var()
    print 'theoretical mean var', skewnorm.stats(0)
    rvs = skewnorm.rvs(5,size=500)
    print 'sample mean var: ', rvs.mean(), rvs.var()
    print 'theoretical mean var', skewnorm.stats(5)
    print skewnorm.cdf(1,0), stats.norm.cdf(1), skewnorm.cdf(1,0) - stats.norm.cdf(1)
    print skewnorm.cdf(1,1000), stats.chi.cdf(1,1), skewnorm.cdf(1,1000) - stats.chi.cdf(1,1)
    print skewnorm.sf(0.05,1000), stats.chi.sf(0.05,1), skewnorm.sf(0.05,1000) - stats.chi.sf(0.05,1)

# generated the same way as distributions in stats.distributions
class SkewNorm2_gen(distributions.rv_continuous):
    def _argcheck(self, alpha):
        return 1 #where(alpha>=0, 1, 0)  

    def _pdf(self,x,alpha):
        # 2*normpdf(x)*normcdf(alpha*x
        return 2.0/np.sqrt(2*np.pi)*np.exp(-x**2/2.0) * special.ndtr(alpha*x)

skewnorm2 = SkewNorm2_gen(name = 'Skew Normal distribution', shapes = 'alpha',
                          extradoc = '''  -inf < alpha < inf''')



class ACSkewT_gen(distributions.rv_continuous):
    def __init__(self):
        #super(SkewT_gen,self).__init__(
        distributions.rv_continuous.__init__(self,
            name = 'Skew T distribution', shapes = 'alpha',
            extradoc = '''
Skewed T distribution by Azzalini, A. & Capitanio, A. (2003)_

the pdf is given by:
 pdf(x) = 2.0 * t.pdf(x, df) * t.cdf(df+1, alpha*x*np.sqrt((1+df)/(x**2+df)))

with alpha >=0

Note: different from skewed t distribution by Hansen 1999
.._
Azzalini, A. & Capitanio, A. (2003), Distributions generated by perturbation of
symmetry with emphasis on a multivariate skew-t distribution,
appears in J.Roy.Statist.Soc, series B, vol.65, pp.367-389

'''                               )
        
    def _argcheck(self, df, alpha):
        return (alpha == alpha)*(df>0)

##    def _arg_check(self, alpha):
##        return np.where(alpha>=0, 0, 1)
##    def _argcheck(self, alpha):
##        return np.where(alpha>=0, 1, 0)
                                             
    def _rvs(self, df, alpha):
        # see http://azzalini.stat.unipd.it/SN/faq.html
        #delta = alpha/np.sqrt(1+alpha**2)
        V = stats.chi2.rvs(df, size=self._size)
        z = skewnorm.rvs(alpha, size=self._size)                                
        return z/np.sqrt(V/df)                                            

    def _munp(self, n, df, alpha):
        # use pdf integration with _mom0_sc if only _pdf is defined.
        # default stats calculation uses ppf
        return self._mom0_sc(n, df, alpha)

    def _pdf(self,x,df,alpha):
        # 2*normpdf(x)*normcdf(alpha*x)
        return 2.0*distributions.t._pdf(x, df) * special.stdtr(df+1, alpha*x*np.sqrt((1+df)/(x**2+df)))

def example_T():
    skewt = ACSkewT_gen()
    rvs = skewt.rvs(10,0,size=500)
    print 'sample mean var: ', rvs.mean(), rvs.var()
    print 'theoretical mean var', skewt.stats(10,0)
    print 't mean var', stats.t.stats(10)
    print skewt.stats(10,1000) # -> folded t distribution, as alpha -> inf
    rvs = np.abs(stats.t.rvs(10,size=1000))
    print rvs.mean(), rvs.var()


def mvsk2cm(*args):
    mu,sig,sk,kur = args
    # Get central moments
    cnt = [None]*4
    cnt[0] = mu
    cnt[1] = sig #*sig
    cnt[2] = sk * sig**1.5
    cnt[3] = (kur+3.0) * sig**2.0
    return cnt


def mvsk2m(args):
    mc, mc2, skew, kurt = args#= self._stats(*args,**mdict)
    mnc = mc
    mnc2 = mc2 + mc*mc
    mc3  = skew*(mc2**1.5) # 3rd central moment
    mnc3 = mc3+3*mc*mc2+mc**3 # 3rd non-central moment
    mc4  = (kurt+3.0)*(mc2**2.0) # 4th central moment
    mnc4 = mc4+4*mc*mc3+6*mc*mc*mc2+mc**4
    return (mc, mc2, mc3, mc4), (mnc, mnc2, mnc3, mnc4)

def mc2mvsk(args):
    mc, mc2, mc3, mc4 = args
    skew = mc3 / mc2**1.5
    kurt = mc4 / mc2**2.0 - 3.0
    return (mc, mc2, skew, kurt)

def m2mc(args):
    mnc, mnc2, mnc3, mnc4 = args
    mc = mnc
    mc2 = mnc2 - mnc*mnc
    #mc3  = skew*(mc2**1.5) # 3rd central moment
    mc3 = mnc3 - (3*mc*mc2+mc**3) # 3rd central moment
    #mc4  = (kurt+3.0)*(mc2**2.0) # 4th central moment
    mc4 = mnc4 - (4*mc*mc3+6*mc*mc*mc2+mc**4)
    return (mc, mc2, mc3, mc4)


from numpy import poly1d,sqrt, exp
import scipy
def _hermnorm(N):
    # return the negatively normalized hermite polynomials up to order N-1
    #  (inclusive)
    #  using the recursive relationship
    #  p_n+1 = p_n(x)' - x*p_n(x)
    #   and p_0(x) = 1
    plist = [None]*N
    plist[0] = poly1d(1)
    for n in range(1,N):
        plist[n] = plist[n-1].deriv() - poly1d([1,0])*plist[n-1]
    return plist

def pdf_moments_st(cnt):
    """Return the Gaussian expanded pdf function given the list of central
    moments (first one is mean).
    """
   
    N = len(cnt)
    if N < 2:
        raise ValueError, "At least two moments must be given to" + \
              "approximate the pdf."
    
    totp = poly1d(1)
    sig = sqrt(cnt[1])
    mu = cnt[0]
    if N > 2:
        Dvals = _hermnorm(N+1)
    for k in range(3,N+1):
        # Find Ck
        Ck = 0.0
        for n in range((k-3)/2):
            m = k-2*n
            if m % 2: # m is odd
                momdiff = cnt[m-1]
            else:
                momdiff = cnt[m-1] - sig*sig*scipy.factorial2(m-1)
            Ck += Dvals[k][m] / sig**m * momdiff
        # Add to totp
        raise
        print Dvals
        print Ck
        totp = totp +  Ck*Dvals[k]

    def thisfunc(x):
        xn = (x-mu)/sig
        return totp(xn)*exp(-xn*xn/2.0)/sqrt(2*pi)/sig
    return thisfunc, totp

def pdf_mvsk(mvsk):
    """Return the Gaussian expanded pdf function given the list of 1st, 2nd
    moment and skew and Fisher (excess) kurtosis.

    

    Parameters
    ----------
    mvsk : list of mu, mc2, skew, kurt
        distribution is matched to these four moments

    Returns
    -------
    pdffunc : function
        function that evaluates the pdf(x), where x is the non-standardized
        random variable.


    Notes
    -----

    Changed so it works only if four arguments are given. Uses explicit
    formula, not loop.

    This implements a Gram-Charlier expansion of the normal distribution
    where the first 2 moments coincide with those of the normal distribution
    but skew and kurtosis can deviate from it.

    In the Gram-Charlier distribution it is possible that the density
    becomes negative. This is the case when the deviation from the
    normal distribution is too large.

    

    References
    ----------
    http://en.wikipedia.org/wiki/Edgeworth_series
    Johnson N.L., S. Kotz, N. Balakrishnan: Continuous Univariate
    Distributions, Volume 1, 2nd ed., p.30    
    """
    N = len(mvsk)
    if N < 4:
        raise ValueError, "Four moments must be given to" + \
              "approximate the pdf."

    mu, mc2, skew, kurt = mvsk
    
    totp = poly1d(1)
    sig = sqrt(mc2)
    if N > 2:
        Dvals = _hermnorm(N+1)
        C3 = skew/6.0
        C4 = kurt/24.0
        # Note: Hermite polynomial for order 3 in _hermnorm is negative
        # instead of positive
        totp = totp -  C3*Dvals[3] +  C4*Dvals[4]

    def pdffunc(x):
        xn = (x-mu)/sig
        return totp(xn)*np.exp(-xn*xn/2.0)/np.sqrt(2*np.pi)/sig
    return pdffunc

def pdf_moments(cnt):
    """Return the Gaussian expanded pdf function given the list of central
    moments (first one is mean).

    Changed so it works only if four arguments are given. Uses explicit
    formula, not loop.

    Notes
    -----

    This implements a Gram-Charlier expansion of the normal distribution
    where the first 2 moments coincide with those of the normal distribution
    but skew and kurtosis can deviate from it.

    In the Gram-Charlier distribution it is possible that the density
    becomes negative. This is the case when the deviation from the
    normal distribution is too large.

    

    References
    ----------
    http://en.wikipedia.org/wiki/Edgeworth_series
    Johnson N.L., S. Kotz, N. Balakrishnan: Continuous Univariate
    Distributions, Volume 1, 2nd ed., p.30
    """
    N = len(cnt)
    if N < 2:
        raise ValueError, "At least two moments must be given to" + \
              "approximate the pdf."


    
    mc, mc2, mc3, mc4 = cnt
    skew = mc3 / mc2**1.5
    kurt = mc4 / mc2**2.0 - 3.0  # Fisher kurtosis, excess kurtosis
    
    totp = poly1d(1)
    sig = sqrt(cnt[1])
    mu = cnt[0]
    if N > 2:
        Dvals = _hermnorm(N+1)
##    for k in range(3,N+1):
##        # Find Ck
##        Ck = 0.0
##        for n in range((k-3)/2):
##            m = k-2*n
##            if m % 2: # m is odd
##                momdiff = cnt[m-1]
##            else:
##                momdiff = cnt[m-1] - sig*sig*scipy.factorial2(m-1)
##            Ck += Dvals[k][m] / sig**m * momdiff
##        # Add to totp
##        raise
##        print Dvals
##        print Ck
##        totp = totp +  Ck*Dvals[k]
        C3 = skew/6.0
        C4 = kurt/24.0
        totp = totp -  C3*Dvals[3] +  C4*Dvals[4]

    def thisfunc(x):
        xn = (x-mu)/sig
        return totp(xn)*np.exp(-xn*xn/2.0)/np.sqrt(2*np.pi)/sig
    return thisfunc

class NormExpan_gen(distributions.rv_continuous):
    def __init__(self,args, **kwds):
        distributions.rv_continuous.__init__(self,
            name = 'Normal Expansion distribution', shapes = 'alpha',
            extradoc = '''
        The distribution is defined as the Gram-Charlier expansion of
        the normal distribution using the first four moments. The pdf
        is given by

        pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc)

        where xc = (x-mu)/sig is the standardized value of the random variable
        and H(xc,3) and H(xc,4) are Hermite polynomials
        
        Note: This distribution has to be parameterized during
        initialization and instantiation, and does not have a shape
        parameter after instantiation (similar to frozen distribution
        except for location and scale.) Location and scale can be used
        as with other distributions, however note, that they are relative
        to the initialized distribution.
        '''  )
        #print args, kwds
        mode = kwds.get('mode', 'sample')

        if mode == 'sample':                            
            mu,sig,sk,kur = stats.describe(args)[2:]
            self.mvsk = (mu,sig,sk,kur)
            cnt = mvsk2cm(mu,sig,sk,kur)
        elif mode == 'mvsk':
            cnt = mvsk2cm(args)
            self.mvsk = args
        elif mode == 'centmom':
            cnt = args
            self.mvsk = mc2mvsk(cnt)
        else:
            raise ValueError, "mode must be 'mvsk' or centmom"
            
        self.cnt = cnt
        #self.mvsk = (mu,sig,sk,kur)
        #self._pdf = pdf_moments(cnt)
        self._pdf = pdf_mvsk(self.mvsk)

    def _munp(self,n):
        # use pdf integration with _mom0_sc if only _pdf is defined.
        # default stats calculation uses ppf
        return self._mom0_sc(n)

    def _stats_skip(self):
        # skip for now to force numerical integration of pdf for testing
        return self.mvsk
                
rvs = skewnorm.rvs(5,size=100)
normexpan = NormExpan_gen(rvs, mode='sample')

smvsk = stats.describe(rvs)[2:]
print 'sample: mu,sig,sk,kur'
print smvsk

dmvsk = normexpan.stats(moments='mvsk')
print 'normexpan: mu,sig,sk,kur'
print dmvsk
print 'mvsk diff distribution - sample'
print np.array(dmvsk) - np.array(smvsk)
print 'normexpan attributes mvsk'
print mc2mvsk(normexpan.cnt)
print normexpan.mvsk


mc, mnc = mvsk2m(dmvsk)
print 'central moments'
print mc
print 'non-central moments'
print mnc


pdffn = pdf_moments(mc)
print '\npdf approximation from moments'
print 'pdf at', mc[0]-1,mc[0]+1
print pdffn([mc[0]-1,mc[0]+1])
print normexpan.pdf([mc[0]-1,mc[0]+1])
